##
## Descriptive analysis
##

## descriptive statistics in introduction

load("Daten/Master_all.RData")
# % of state actors engaged at least once in sexual violence in active conflict
length(unique(Master[Master$actor_type_state==1 & Master$max_prev > 0 & Master$conflictyear==1,]$actorid)) / length(unique(Master[Master$actor_type_state==1 & Master$conflictyear==1,]$actorid))
# % of non-state actor in general
length(unique(Master[Master$actor_type_state==0 & Master$max_prev > 0 & Master$conflictyear==1,]$actorid)) / length(unique(Master[Master$actor_type_state==0 & Master$conflictyear==1,]$actorid))
# % of non-state insurgents
length(unique(Master[Master$actor_type_sideB_rebel==1 & Master$max_prev > 0 & Master$conflictyear==1,]$actorid)) / length(unique(Master[Master$actor_type_sideB_rebel==1  & Master$conflictyear==1,]$actorid))
# % of pro-government militias
length(unique(Master[Master$actor_type_sideA_PGM==1 & Master$max_prev > 0 & Master$conflictyear==1,]$actorid)) / length(unique(Master[Master$actor_type_sideA_PGM==1  & Master$conflictyear==1,]$actorid))

# Graphics for the descriptive analytical part

## TODO
# - auch die relativen Graphiken erstellen: Anteil an Gesamtanzahl aller Konflikte pro Jahr
##


rm(list=ls(all=TRUE))
setwd("/Users/nklotz/Documents/Studium/SS2014/Masterarbeit")
library(plyr)
library(ggplot2)
source("Visualization_Theme.R")

######################################
## TOTAL COUNT OF ACTOR-CONFLICTS SPLIT BY ACTOR-TYPES ##
######################################
# high number of actor-conflicts per year can mean several things:
#   - 1°: many actors in one conflict
#   - 2°: some actors in some conflicts
#   - 3°: one actor in many conflicts
# => from outside we cannot distinguis; it means also:
#   - one actor may be counted several times in one year if committing SV in several conflicts
#   - one conflict may be counted several times in one year if more than one actor committed SV in this conflict

# only take ongoing conflicts, not interim and post-conflict years
load("Daten/Master_all.RData")
Master <- subset(Master, conflictyear==1 & !is.na(max_prev) & max_prev != 0)

# create grouping variable (1=states, 2=PGMs, 3=Other non-state actors)
Master$grp <- NA
Master$grp[Master$actor_type_state==1] <- 1#as.character("State Actor")
Master$grp[Master$actor_type_state==0 & Master$actor_type_sideA_PGM==0] <- 3#as.character("Non-State Actor")
Master$grp[Master$actor_type_sideA_PGM==1] <- 2#as.character("Government-supporting Militia")

# N° of conflicts (only ongoing, no interim or post-conflict years) with (any prevalence of) SV by actor type (grp)
counts <- plyr::count(Master, c("year", "grp"))

# fix missing rows, otherwise geom_area will not work properly
counts <- rbind(counts, 
                data.frame(year=1990,grp=2,freq=0),
                data.frame(year=1996,grp=2,freq=0))
counts <- counts[order(counts$grp, counts$year),] # important!
counts$grp <- factor(counts$grp, labels=c("State Actor", "Pro-Government Militia", "Non-State Actor"))

# Plot
g <- ggplot(counts)# + theme_bw()
g <- g + geom_area(aes(x=year, y=freq, group=grp, fill=grp, ymax = max(freq)), position="stack")
#g <- g + geom_line(aes(x=year, y=share), data=counts.total, linetype="dashed") # dashed line indicating relative share
#g <- g + geom_line(aes(x=year, y=freq, group=grp, colour = grp, fill=grp, ymax = max(freq)), position="stack")
g <- g + scale_y_continuous(expand = c(0,0), limits = c(0,40)) 
g <- g + scale_x_continuous(expand = c(0,0))
g <- g + scale_fill_manual(values=c("#F2615E", "#6CA206", "#1BB3B7"),
                           name="Actor\ntype")
g <- g + labs(title = "(c) N° of actor-conflicts per year split by actor-type", y="Frequency", x="")
g <- g + theme_MA()
g


# % of conflicts with SV relative to total number of conflicts
# DAS SIND NICHT UNIQUE CONFLICTS! WENN IN EINEM KONFLIKT STATES UND NON-STATES SV BEGEHEN, WERDEN SIE DOPPELT GEZÄHLT!
# JETZT SCHON, ABGEÄNDERT, ÄNDERT ABER ALLES?
load("Daten/Master_all.RData")


# HABE JETZT HIER SO GEÄNDERT, DASS NUR UNIQUE CONFLICTS GEZÄHLT WERDEN MÜSSTEN
# ABER: IN 2005-2007 SAGEN DIE ZAHLEN, ES GÄBE KEINE AKTIVEN KONFLIKTE MIT SV
# DIE ANDEREN GRAFIKEN SAGEN DA ABER WAS ANDERES! DENN ES GIBT IN DEN JAHREN ACTOR-CONFLICTs MIT SV?!!!!

# Absolut N° of conflicts with SV per year
conflicts.with.sv <- subset(Master, conflictyear==1 & !is.na(max_prev) & max_prev != 0)
conflicts.with.sv <- conflicts.with.sv[!duplicated(conflicts.with.sv[c("conflictid", "year")]),]
#conflicts.with.sv <- count(Master[Master$max_prev != 0,], c("year", "conflictid"))
conflicts.with.sv <- plyr::count(conflicts.with.sv[c("year", "conflictid")], c("year"))

conflicts.total <- plyr::count(Master, c("year", "conflictid"))
conflicts.total <- plyr::count(conflicts.total[c("year", "conflictid")], c("year"))

conflicts <- merge(conflicts.with.sv, conflicts.total, by = c("year"))
conflicts$share <- (conflicts$freq.x / conflicts$freq.y) * 100

# total count of SV and relative share in an independent graph
g2 <- ggplot(conflicts) + ylim(c(0, 25))
g2 <- g2 + geom_line(aes(x=year, y=freq.x), linetype="solid")
#g2 <- g2 + geom_hline(yintercept = mean(conflicts$share), linetype="dashed", color="red")
#g2 <- g2 + geom_hline(yintercept = mean(conflicts$freq.x), linetype="dashed", color="red")
g2 <- g2 + labs(title = "(a) Absolute N° of conflicts with SV per year", y = "Frequency", x="")
g2 <- g2 + theme_MA()
g2

g5 <- ggplot(conflicts) + ylim(c(0, 100))
g5 <- g5 + geom_line(aes(x=year, y=share), linetype="dashed")
#g2 <- g2 + geom_hline(yintercept = mean(conflicts$share), linetype="dashed", color="red")
#g2 <- g2 + geom_hline(yintercept = mean(conflicts$freq.x), linetype="dashed", color="red")
g5 <- g5 + labs(title = "(b) Relative N° of conflicts with SV per year", y="Percentage", x="")
g5 <- g5 + theme_MA()
g5

min(conflicts$freq.x) # Jahres-Minimum
max(conflicts$freq.x) # Jahres-Maximum
mean(conflicts$freq.x)
min(conflicts$share)
max(conflicts$share)
mean(conflicts$share) # Avarge
sum(conflicts$freq.x)/sum(conflicts$freq.y) # Gesamt-Prozentzahl

# how many conflicts feature SV by any actor? Share?
load("Daten/Master_all.RData")
unique.conflicts <- length(unique(Master$conflictid[Master$conflictyear==1 & !is.na(Master$max_prev)]))
unique.conflicts.sv <- length(unique(Master$conflictid[Master$conflictyear==1 & Master$max_prev > 0]))
(unique.conflicts.sv / unique.conflicts)*100

# how many conflicts feature SV by states? Share?
load("Daten/Master_states.RData")
unique.conflicts.states <- length(unique(states$conflictid[states$conflictyear==1 & !is.na(states$max_prev)]))
unique.conflicts.states.sv <- length(unique(states$conflictid[states$conflictyear==1 & states$max_prev > 0]))
(unique.conflicts.states.sv / unique.conflicts.states)*100

# Save
#ggsave()


#########################################
## TOTAL COUNT OF ACTIVE CONFLICTS WITH SV PER YEAR SPLIT BY CONFLICT-TYPES ##
#########################################

# only take ongoing conflicts, not interim and post-conflict years
#Master <- subset(Master, conflictyear==1 & !is.na(max_prev))
load("Daten/Master_all.RData")
counts <- subset(Master, conflictyear==1 & !is.na(max_prev) & max_prev != 0)

# get rid of duplicate conflicts
counts <- counts[!duplicated(counts[c("conflictid", "year")]),]

# N° of conflicts with SV by conflict type (type)
counts <- as.data.frame(xtabs(~year+type, data=counts)) # as 'counts()' but preserves 0-categories

# fix missing rows, otherwise geom_area will not work properly
counts <- rbind(counts, 
                data.frame(year=1989,type=2,Freq=0),
                data.frame(year=1992,type=2,Freq=0),
                data.frame(year=1993,type=2,Freq=0),
                data.frame(year=1994,type=2,Freq=0),
                data.frame(year=1996,type=2,Freq=0),
                data.frame(year=1997,type=2,Freq=0),
                data.frame(year=1999,type=2,Freq=0),
                data.frame(year=2001,type=2,Freq=0),
                data.frame(year=2002,type=2,Freq=0),
                data.frame(year=2004,type=2,Freq=0),
                data.frame(year=2005,type=2,Freq=0),
                data.frame(year=2006,type=2,Freq=0),
                data.frame(year=2007,type=2,Freq=0),
                data.frame(year=2008,type=2,Freq=0),
                data.frame(year=2009,type=2,Freq=0),
                data.frame(year=1990,type=4,Freq=0))
counts <- counts[order(counts$type, counts$year),] # important!
counts$type <- factor(counts$type, labels=c("Interstate", "Intrastate", "Internationalized"))

# % of conflicts with SV relative to total number of conflicts
# counts.total <- count(Master[Master$max_prev != 0,], c("year"))
# counts.total$total <- count(Master[!is.na(Master$max_prev),], c("year"))$freq
# counts.total$share <- with(counts.total, freq/total*100)

# Plot
g3 <- ggplot(counts)
g3 <- g3 + geom_area(aes(x=year, y=Freq, group=type, fill=type), position="stack")
#g <- g + scale_y_continuous(expand = c(0,0), limits = c(0,40)) 
#g3 <- g3 + scale_x_continuous(expand = c(0,0))
g3 <- g3 + labs(title = "(e) N° of conflicts with SV per year", y="Frequency", x="")
g3 <- g3 + scale_x_discrete(breaks=c("1990", "1995", "2000", "2005"))
g3 <- g3 + scale_fill_manual(values=c("#F2615E", "#6CA206", "#1BB3B7"), 
                       name="Conflict\ntype")
g3 <- g3 + theme_MA()
g3

# Save
#ggsave()


#######################################
## TOTAL COUNT PER YEAR SPLIT BY SV OCCURENCE ##
#######################################

# only take ongoing conflicts, not interim and post-conflict years
load("Daten/Master_all.RData")
Master <- subset(Master, conflictyear==1 & !is.na(max_prev) & max_prev != 0)

# N° of conflicts (only ongoing, no interim or post-conflict years) with (any prevalence of) SV by actor type (grp)
counts <- plyr::count(Master, c("year", "max_prev"))

# fix missing rows, otherwise geom_area will not work properly
counts <- rbind(counts, 
                data.frame(year=1990,max_prev=3,freq=0),
                data.frame(year=1991,max_prev=3,freq=0),
                data.frame(year=1989,max_prev=3,freq=0),
                data.frame(year=2009,max_prev=3,freq=0))
counts <- counts[order(counts$max_prev, counts$year),] # important!
counts$max_prev <- factor(counts$max_prev, labels=c("1", "2", "3"))


# Plot
g4 <- ggplot(counts)# + theme_bw()
g4 <- g4 + geom_area(aes(x=year, y=freq, group=max_prev, fill=max_prev, ymax = max(freq)), position="stack")
#g4 <- g4 + geom_line(aes(x=year, y=share), data=counts.total, linetype="dashed") # dashed line indicating relative share
#g <- g + geom_line(aes(x=year, y=freq, group=grp, colour = grp, fill=grp, ymax = max(freq)), position="stack")
g4 <- g4 + scale_y_continuous(expand = c(0,0), limits = c(0,40)) 
g4 <- g4 + scale_x_continuous(expand = c(0,0))
g4 <- g4 + labs(title = "(d) N° of actor-conflicts with SV per year split by SV-occurence")
g4 <- g4 + scale_fill_manual(values=c("#F2615E", "#6CA206", "#1BB3B7"), 
                             name="Prevalence\nof SV",
                             labels=c("Isolated", "Widespread", "Massive"))
g4 <- g4 + theme_MA()
g4



##
# PLOT THEM ALL
##
## A4 ratio, 72 PPI: 595 Pixels	842 Pixels
##

grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 6)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(g2, vp = vplayout(1, 1:3))
print(g5, vp = vplayout(1, 4:6))
print(g, vp = vplayout(2, 1:2))
print(g4, vp = vplayout(2, 3:4))
print(g3, vp = vplayout(2, 5:6))


#####################################
## WHICH STATES COMMIT SV? (TABLE) ##
#####################################
# shall look like this: state name | cat. 1 | cat. 2 | cat. 3 | sum

# only take ongoing conflicts, not interim and post-conflict years
load("Daten/Master_states.RData")
states <- subset(states, conflictyear==1 & !is.na(max_prev))

# N° of state-conflict-years with SV (per SV category, per state)
counts.sv.1 <- count(states[states$max_prev == 1,], c("actor"))
counts.sv.2 <- count(states[states$max_prev == 2,], c("actor"))
counts.sv.3 <- count(states[states$max_prev == 3,], c("actor"))
counts.sv.states <- merge(counts.sv.1, counts.sv.2, by = "actor", all = T)
counts.sv.states <- merge(counts.sv.states, counts.sv.3, by = "actor", all = T)

# Add column: total N° of state-conflict-years with SV per state
counts.sv.states$Total <- rowSums(counts.sv.states[2:4], na.rm = T)
colnames(counts.sv.states) <- c("States", "SV1", "SV2", "SV3", "Total")

# before adding final (sum) row, order decreasingly
counts.sv.states <- counts.sv.states[order(counts.sv.states$Total, 
                                           counts.sv.states$SV3, 
                                           counts.sv.states$SV2, 
                                           counts.sv.states$SV1, 
                                           decreasing = T, na.last = T),]

# Add row: total N° of conflict-years with SV per SV category
counts.sv.states.total <- cbind( # create new row with sums
  States="Total",
  SV1=colSums(counts.sv.states["SV1"], na.rm = T),
  SV2=colSums(counts.sv.states["SV2"], na.rm = T),
  SV3=colSums(counts.sv.states["SV3"], na.rm = T),
  Total=colSums(counts.sv.states["Total"], na.rm = T)
  )
counts.sv.states <- rbind( # add created row existing data
  counts.sv.states,
  counts.sv.states.total
  )

# replace NA with 0
counts.sv.states$SV1[is.na(counts.sv.states$SV1)] <- 0
counts.sv.states$SV2[is.na(counts.sv.states$SV2)] <- 0
counts.sv.states$SV3[is.na(counts.sv.states$SV3)] <- 0

rm(counts.sv.1, counts.sv.2, counts.sv.3, counts.sv.states.total)

# export


###############################################################
## WHICH CONFLICTS FEATURE THE HIGHEST AMOUNT OF SV BY STATES? (TABLE) ##
###############################################################
## shall look like this: state name | cat. 1 | cat. 2 | cat. 3 | sum

# only take ongoing conflicts, not interim and post-conflict years
load("Daten/Master_states.RData")
states <- subset(states, conflictyear==1 & !is.na(max_prev))

# N° of state-conflict-years with SV (per SV category, per conflict)
counts.sv.1 <- count(states[states$max_prev == 1,], c("conflictid"))
counts.sv.2 <- count(states[states$max_prev == 2,], c("conflictid"))
counts.sv.3 <- count(states[states$max_prev == 3,], c("conflictid"))
counts.sv.conflicts <- merge(counts.sv.1, counts.sv.2, by = "conflictid", all = T)
counts.sv.conflicts <- merge(counts.sv.conflicts, counts.sv.3, by = "conflictid", all = T)

# Add column: total N° of state-conflict-years with SV per state
counts.sv.conflicts$Total <- rowSums(counts.sv.conflicts[2:4], na.rm = T)
colnames(counts.sv.conflicts) <- c("Conflict", "SV1", "SV2", "SV3", "Total")

# before adding final (sum) row, order decreasingly
counts.sv.conflicts <- counts.sv.conflicts[order(counts.sv.conflicts$Total, 
                                                 counts.sv.conflicts$SV3, 
                                                 counts.sv.conflicts$SV2, 
                                                 counts.sv.conflicts$SV1,
                                                 decreasing = T, na.last = T),]

# Add row: total N° of conflict-years with SV per SV category
counts.sv.conflicts.total <- cbind( # create new row with sums
  Conflicts="Total",
  SV1=colSums(counts.sv.conflicts["SV1"], na.rm = T),
  SV2=colSums(counts.sv.conflicts["SV2"], na.rm = T),
  SV3=colSums(counts.sv.conflicts["SV3"], na.rm = T),
  Total=colSums(counts.sv.conflicts["Total"], na.rm = T)
)
counts.sv.conflicts <- rbind( # add created row existing data
  counts.sv.conflicts,
  counts.sv.conflicts.total
)

# replace NA with 0
counts.sv.conflicts$SV1[is.na(counts.sv.conflicts$SV1)] <- 0
counts.sv.conflicts$SV2[is.na(counts.sv.conflicts$SV2)] <- 0
counts.sv.conflicts$SV3[is.na(counts.sv.conflicts$SV3)] <- 0

rm(counts.sv.1, counts.sv.2, counts.sv.3, counts.sv.conflicts.total)


###############################################################
## WHICH CONFLICTS FEATURE THE HIGHEST AMOUNT OF SV BY ALL ACTORS? (TABLE) ##
###############################################################
## shall look like this: state name | cat. 1 | cat. 2 | cat. 3 | sum

# only take ongoing conflicts, not interim and post-conflict years
load("Daten/Master_all.RData")
Master <- subset(Master, conflictyear==1 & !is.na(max_prev))

# N° of state-conflict-years with SV (per SV category, per conflict)
all.counts.sv.1 <- count(Master[Master$max_prev == 1,], c("conflictid"))
all.counts.sv.2 <- count(Master[Master$max_prev == 2,], c("conflictid"))
all.counts.sv.3 <- count(Master[Master$max_prev == 3,], c("conflictid"))
all.counts.sv.conflicts <- merge(all.counts.sv.1, all.counts.sv.2, by = "conflictid", all = T)
all.counts.sv.conflicts <- merge(all.counts.sv.conflicts, all.counts.sv.3, by = "conflictid", all = T)

# Add column: total N° of state-conflict-years with SV per state
all.counts.sv.conflicts$Total <- rowSums(all.counts.sv.conflicts[2:4], na.rm = T)
colnames(all.counts.sv.conflicts) <- c("Conflict", "SV1", "SV2", "SV3", "Total")

# before adding final (sum) row, order decreasingly
all.counts.sv.conflicts <- all.counts.sv.conflicts[order(all.counts.sv.conflicts$Total,
                                                         all.counts.sv.conflicts$SV3,
                                                         all.counts.sv.conflicts$SV2,
                                                         all.counts.sv.conflicts$SV1,
                                                         decreasing = T, na.last = T),]

# Add row: total N° of conflict-years with SV per SV category
all.counts.sv.conflicts.total <- cbind( # create new row with sums
  Conflict="Total",
  SV1=colSums(all.counts.sv.conflicts["SV1"], na.rm = T),
  SV2=colSums(all.counts.sv.conflicts["SV2"], na.rm = T),
  SV3=colSums(all.counts.sv.conflicts["SV3"], na.rm = T),
  Total=colSums(all.counts.sv.conflicts["Total"], na.rm = T)
)
all.counts.sv.conflicts <- rbind( # add created row existing data
  all.counts.sv.conflicts,
  all.counts.sv.conflicts.total
)

# replace NA with 0
all.counts.sv.conflicts$SV1[is.na(all.counts.sv.conflicts$SV1)] <- 0
all.counts.sv.conflicts$SV2[is.na(all.counts.sv.conflicts$SV2)] <- 0
all.counts.sv.conflicts$SV3[is.na(all.counts.sv.conflicts$SV3)] <- 0

rm(all.counts.sv.1, all.counts.sv.2, all.counts.sv.3, all.counts.sv.conflicts.total)
